{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import math\n",
    "\n",
    "from collections import Counter\n",
    "\n",
    "import scipy as sp\n",
    "#from scipy.stats import nanmean\n",
    "#from scipy.stats import nanstd\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['game_id', 'condition', 'round_index', 'correct_answer',\n",
       "       'plot_sequence', 'player_id', 'cumulative_score', 'difficulty',\n",
       "       'revised_guess', 'revised_error', 'alter_1', 'alter_2', 'alter_3',\n",
       "       'increment', 'increment_color', 'in_degree', 'study',\n",
       "       'alter_1_revised_guess', 'alter_2_revised_guess',\n",
       "       'alter_3_revised_guess', 'alter_1_independent_guess',\n",
       "       'alter_2_independent_guess', 'alter_3_independent_guess',\n",
       "       'round_after_shock', 'quarter', 'task_instance_avg_error',\n",
       "       'revised_error_relative2solo', 'independent_error_relative2solo',\n",
       "       'independent_guess', 'independent_error', 'half', 'best_player',\n",
       "       'ind_score', 'cumulative_ind_score', 'final_score', 'final_ind_score',\n",
       "       'improvement', 'group', 'best_player_initial', 'best_player_revised',\n",
       "       'current_rev_score', 'current_ind_score', 'half_cumulative'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round_data = pd.read_csv('./all_studies_round_data.csv')\n",
    "round_data.columns\n",
    "round_data['current_rev_score'] = round_data['cumulative_score'] - round_data['increment']\n",
    "round_data['current_ind_score'] = round_data['cumulative_ind_score'] - round_data['increment']\n",
    "\n",
    "round_data['half_cumulative'] = round_data['cumulative_score']\n",
    "\n",
    "round_data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>player_id</th>\n",
       "      <th>round_index</th>\n",
       "      <th>increment</th>\n",
       "      <th>cumulative_score</th>\n",
       "      <th>final_score</th>\n",
       "      <th>half_cumulative</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>954</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>1</td>\n",
       "      <td>0.70</td>\n",
       "      <td>0.70</td>\n",
       "      <td>15.02</td>\n",
       "      <td>0.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1917</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>2</td>\n",
       "      <td>0.48</td>\n",
       "      <td>1.18</td>\n",
       "      <td>15.02</td>\n",
       "      <td>1.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2924</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>3</td>\n",
       "      <td>0.92</td>\n",
       "      <td>2.10</td>\n",
       "      <td>15.02</td>\n",
       "      <td>2.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5559</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>4</td>\n",
       "      <td>0.84</td>\n",
       "      <td>2.94</td>\n",
       "      <td>15.02</td>\n",
       "      <td>2.94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5885</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>5</td>\n",
       "      <td>0.64</td>\n",
       "      <td>3.58</td>\n",
       "      <td>15.02</td>\n",
       "      <td>3.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7708</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>6</td>\n",
       "      <td>0.90</td>\n",
       "      <td>4.48</td>\n",
       "      <td>15.02</td>\n",
       "      <td>4.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9393</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>7</td>\n",
       "      <td>0.83</td>\n",
       "      <td>5.31</td>\n",
       "      <td>15.02</td>\n",
       "      <td>5.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10559</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>8</td>\n",
       "      <td>0.96</td>\n",
       "      <td>6.27</td>\n",
       "      <td>15.02</td>\n",
       "      <td>6.27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11714</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>9</td>\n",
       "      <td>0.96</td>\n",
       "      <td>7.23</td>\n",
       "      <td>15.02</td>\n",
       "      <td>7.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13513</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>10</td>\n",
       "      <td>0.68</td>\n",
       "      <td>7.91</td>\n",
       "      <td>15.02</td>\n",
       "      <td>7.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14573</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>11</td>\n",
       "      <td>0.90</td>\n",
       "      <td>8.81</td>\n",
       "      <td>15.02</td>\n",
       "      <td>0.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16009</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>12</td>\n",
       "      <td>0.81</td>\n",
       "      <td>9.62</td>\n",
       "      <td>15.02</td>\n",
       "      <td>1.71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18309</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>13</td>\n",
       "      <td>0.63</td>\n",
       "      <td>10.25</td>\n",
       "      <td>15.02</td>\n",
       "      <td>2.34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19822</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>14</td>\n",
       "      <td>0.74</td>\n",
       "      <td>10.99</td>\n",
       "      <td>15.02</td>\n",
       "      <td>3.08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20530</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>15</td>\n",
       "      <td>0.32</td>\n",
       "      <td>11.31</td>\n",
       "      <td>15.02</td>\n",
       "      <td>3.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22447</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>16</td>\n",
       "      <td>0.97</td>\n",
       "      <td>12.28</td>\n",
       "      <td>15.02</td>\n",
       "      <td>4.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23898</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>17</td>\n",
       "      <td>0.71</td>\n",
       "      <td>12.99</td>\n",
       "      <td>15.02</td>\n",
       "      <td>5.08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24830</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>18</td>\n",
       "      <td>0.81</td>\n",
       "      <td>13.80</td>\n",
       "      <td>15.02</td>\n",
       "      <td>5.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26659</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>19</td>\n",
       "      <td>0.54</td>\n",
       "      <td>14.34</td>\n",
       "      <td>15.02</td>\n",
       "      <td>6.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27319</th>\n",
       "      <td>rFbSt2rKkEKdinL88</td>\n",
       "      <td>20</td>\n",
       "      <td>0.68</td>\n",
       "      <td>15.02</td>\n",
       "      <td>15.02</td>\n",
       "      <td>7.11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               player_id  round_index  increment  cumulative_score  \\\n",
       "954    rFbSt2rKkEKdinL88            1       0.70              0.70   \n",
       "1917   rFbSt2rKkEKdinL88            2       0.48              1.18   \n",
       "2924   rFbSt2rKkEKdinL88            3       0.92              2.10   \n",
       "5559   rFbSt2rKkEKdinL88            4       0.84              2.94   \n",
       "5885   rFbSt2rKkEKdinL88            5       0.64              3.58   \n",
       "7708   rFbSt2rKkEKdinL88            6       0.90              4.48   \n",
       "9393   rFbSt2rKkEKdinL88            7       0.83              5.31   \n",
       "10559  rFbSt2rKkEKdinL88            8       0.96              6.27   \n",
       "11714  rFbSt2rKkEKdinL88            9       0.96              7.23   \n",
       "13513  rFbSt2rKkEKdinL88           10       0.68              7.91   \n",
       "14573  rFbSt2rKkEKdinL88           11       0.90              8.81   \n",
       "16009  rFbSt2rKkEKdinL88           12       0.81              9.62   \n",
       "18309  rFbSt2rKkEKdinL88           13       0.63             10.25   \n",
       "19822  rFbSt2rKkEKdinL88           14       0.74             10.99   \n",
       "20530  rFbSt2rKkEKdinL88           15       0.32             11.31   \n",
       "22447  rFbSt2rKkEKdinL88           16       0.97             12.28   \n",
       "23898  rFbSt2rKkEKdinL88           17       0.71             12.99   \n",
       "24830  rFbSt2rKkEKdinL88           18       0.81             13.80   \n",
       "26659  rFbSt2rKkEKdinL88           19       0.54             14.34   \n",
       "27319  rFbSt2rKkEKdinL88           20       0.68             15.02   \n",
       "\n",
       "       final_score  half_cumulative  \n",
       "954          15.02             0.70  \n",
       "1917         15.02             1.18  \n",
       "2924         15.02             2.10  \n",
       "5559         15.02             2.94  \n",
       "5885         15.02             3.58  \n",
       "7708         15.02             4.48  \n",
       "9393         15.02             5.31  \n",
       "10559        15.02             6.27  \n",
       "11714        15.02             7.23  \n",
       "13513        15.02             7.91  \n",
       "14573        15.02             0.90  \n",
       "16009        15.02             1.71  \n",
       "18309        15.02             2.34  \n",
       "19822        15.02             3.08  \n",
       "20530        15.02             3.40  \n",
       "22447        15.02             4.37  \n",
       "23898        15.02             5.08  \n",
       "24830        15.02             5.89  \n",
       "26659        15.02             6.43  \n",
       "27319        15.02             7.11  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round_data['half_cumulative_2nd'] = round_data[round_data.round_index>10].groupby('player_id')['increment'].transform(pd.Series.cumsum)\n",
    "\n",
    "round_data.half_cumulative_2nd.fillna(round_data.half_cumulative, inplace=True)\n",
    "del round_data['half_cumulative']\n",
    "round_data = round_data.rename(columns={\"half_cumulative_2nd\": \"half_cumulative\"})\n",
    "\n",
    "round_data[round_data.player_id == \"rFbSt2rKkEKdinL88\"][['player_id',\n",
    "            'round_index','increment','cumulative_score','final_score', 'half_cumulative']].sort_values(\"round_index\", ascending=True)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute additional columns:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['game_id', 'condition', 'round_index', 'correct_answer',\n",
       "       'plot_sequence', 'player_id', 'cumulative_score', 'difficulty',\n",
       "       'revised_guess', 'revised_error', 'alter_1', 'alter_2', 'alter_3',\n",
       "       'increment', 'increment_color', 'in_degree', 'study',\n",
       "       'alter_1_revised_guess', 'alter_2_revised_guess',\n",
       "       'alter_3_revised_guess', 'alter_1_independent_guess',\n",
       "       'alter_2_independent_guess', 'alter_3_independent_guess',\n",
       "       'round_after_shock', 'quarter', 'task_instance_avg_error',\n",
       "       'revised_error_relative2solo', 'independent_error_relative2solo',\n",
       "       'independent_guess', 'independent_error', 'half', 'best_player',\n",
       "       'ind_score', 'cumulative_ind_score', 'final_score', 'final_ind_score',\n",
       "       'improvement', 'group', 'best_player_initial', 'best_player_revised',\n",
       "       'current_rev_score', 'current_ind_score', 'half_cumulative'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_score_and_cum_score(round_data):\n",
    "    round_data = round_data.sort_values(by='round_index')\n",
    "    round_data['ind_score'] = 1-abs(round_data.independent_guess - round_data.correct_answer)\n",
    "    round_data['cumulative_ind_score'] = round_data.groupby('player_id')['ind_score'].transform(pd.Series.cumsum)\n",
    "    round_data['final_score'] = round_data.groupby('player_id')['cumulative_score'].transform(pd.Series.max)\n",
    "    round_data['final_ind_score'] = round_data.groupby('player_id')['cumulative_ind_score'].transform(pd.Series.max)\n",
    "\n",
    "    return  round_data\n",
    "\n",
    "round_data = get_score_and_cum_score(round_data)\n",
    "round_data.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Make game data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def game_data_with_top_cols(round_data,n_boot):\n",
    "    \n",
    "    game_data_list = []\n",
    "    \n",
    "    ####### make all conditions exept solos\n",
    "    for condition in ['dynamic_no_feedback','dynamic_self_feedback','dynamic_full_feedback',  'static', 'dynamic']:\n",
    "        print('aggregating condition ', condition)\n",
    "        for game in round_data[round_data.condition == condition].game_id.unique():\n",
    "            for rid in range(1,20+1):\n",
    "                this_game_round_data = round_data[(round_data.condition==condition) &\n",
    "                                                 (round_data.round_index==rid) &\n",
    "                                                 (round_data.game_id == game)]\n",
    "                \n",
    "                group_guess_rev = this_game_round_data.revised_guess.mean(skipna=True)\n",
    "                group_guess_ind = this_game_round_data.independent_guess.mean(skipna=True)\n",
    "                correct_an =  this_game_round_data.correct_answer.iloc[0]\n",
    "\n",
    "                game_data = {'game_id':game, 'condition':condition,\n",
    "                             'round_index':rid,\n",
    "                             'independent_wisdom':group_guess_ind,\n",
    "                             'revised_wisdom':group_guess_rev,\n",
    "                             'revised_wisdom_error': abs( group_guess_rev - correct_an ),\n",
    "                             'independent_wisdom_error': abs(group_guess_ind - correct_an ),\n",
    "                             'correct_answer': correct_an,\n",
    "                             'task_instance_avg_error': this_game_round_data.task_instance_avg_error.iloc[0]\n",
    "                            }\n",
    "                \n",
    "                ## top_k errors for the trade off plot\n",
    "                for k in range(1,12+1): \n",
    "                    # use [this_game_round_data.round_index ==rid] for issue with duplicated ids because of sampling\n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"final_ind_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_ind'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"final_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    \n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"current_ind_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_ind_live'] = abs(top_k_an - correct_an)\n",
    "\n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"current_rev_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev_live'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    if rid < 11:\n",
    "                        this_game_quarter_data = round_data[(round_data.condition==condition) &\n",
    "                                 (round_data.round_index==5) &\n",
    "                                 (round_data.game_id == game)] ##this gives me the first quarter data\n",
    "                    else:\n",
    "                        this_game_quarter_data = round_data[(round_data.condition==condition) &\n",
    "                                                            (round_data.round_index==15) &\n",
    "                                                            (round_data.game_id == game)] ##this gives me the second quarter data\n",
    "\n",
    "                    top_k_players = this_game_quarter_data.sort_values(\"half_cumulative\", \n",
    "                                                                       ascending=False).head(k).player_id.tolist()#these are the top k players in the quarter\n",
    "\n",
    "                    #print(top_k_players)\n",
    "                    top_k_an = this_game_round_data[(this_game_round_data.round_index ==rid) & \n",
    "                                                    (this_game_round_data.player_id.isin(top_k_players))].revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev_ex_ante'] = abs(top_k_an - correct_an)\n",
    "\n",
    "                    \n",
    "#                     top_k_players = this_game_quarter_data.sort_values(\"current_ind_score\", \n",
    "#                                                                        ascending=False).head(k).player_id.tolist()#these are the top k players in the quarter\n",
    "\n",
    "#                     #print(top_k_players)\n",
    "#                     top_k_an = this_game_round_data[(this_game_round_data.round_index ==rid) & \n",
    "#                                                     (this_game_round_data.player_id.isin(top_k_players))].revised_guess.mean()\n",
    "#                     game_data['top_'+str(k)+'_ind_ex_ante'] = abs(top_k_an - correct_an)\n",
    "\n",
    "\n",
    "                    \n",
    "                game_data_list.append(game_data)\n",
    "                \n",
    "    \n",
    "    ######### make bootstrap solo rows\n",
    "    \n",
    "    #select 12\n",
    "    bootstrap_conditions  = ['solo_feedback', 'solo_no_feedback']\n",
    "    for solo_condition in bootstrap_conditions:\n",
    "        seq = {0: round_data[(round_data.condition==solo_condition) & (round_data.plot_sequence == 'easy->hard')]['player_id'].unique(),\n",
    "               1: round_data[(round_data.condition==solo_condition) & (round_data.plot_sequence == 'medium->easy')]['player_id'].unique(),\n",
    "               2: round_data[(round_data.condition==solo_condition) & (round_data.plot_sequence == 'hard->medium')]['player_id'].unique()\n",
    "              }\n",
    "        print(\"Solo condition bootstrapping now is\", solo_condition)\n",
    "        for j in range(0,n_boot):\n",
    "            print(\"Boot      \", j, end='\\r')\n",
    "            this_game_round_data = None\n",
    "            #print ('bootstrap ', j)\n",
    "            this_game_ids = list(np.random.choice(seq[0],size= 4, replace= True)) + list(np.random.choice(seq[1],size= 4, replace= True)) + list(np.random.choice(seq[2],size= 4, replace= True))\n",
    "            this_game_round_data = round_data[round_data.player_id.isin(this_game_ids)]\n",
    "            this_game_round_data['game_id '] = j\n",
    "\n",
    "\n",
    "            #make sure that users are sampled once otherwise append them\n",
    "            repeated_sample = Counter(this_game_ids)\n",
    "            #return repeated_sample\n",
    "\n",
    "            repeated_sample_counts = list(repeated_sample.values())\n",
    "\n",
    "            if sum(i > 1 for i in repeated_sample_counts) > 0:    \n",
    "                #print('number of repeated player', sum(i > 1 for i in repeated_sample_counts) )\n",
    "                for repeated_user in repeated_sample.keys():\n",
    "                    repetitions = (repeated_sample[repeated_user]-1)\n",
    "                    if repetitions > 0:\n",
    "                        this_game_round_data = this_game_round_data.append([this_game_round_data[this_game_round_data.player_id==repeated_user]]*repetitions)\n",
    "\n",
    "\n",
    "            #print(len(this_game_round_data))\n",
    "            #this_game_round_data = add_rank_solo(this_game_round_data)\n",
    "            #print(this_game_round_data.round_index)\n",
    "            for rid in range(1,20+1):\n",
    "                guess_rev = this_game_round_data[this_game_round_data.round_index ==rid].revised_guess.mean(skipna=True)\n",
    "                guess_ind = this_game_round_data[this_game_round_data.round_index ==rid].independent_guess.mean(skipna=True)\n",
    "                #print(this_game_round_data)\n",
    "                #print(this_game_round_data[this_game_round_data.round_id ==rid].correct_answer)\n",
    "                correct_an =  this_game_round_data[this_game_round_data.round_index ==rid].correct_answer.iloc[0]\n",
    "                game_data = {'game_id':str(j)+'_'+solo_condition,\n",
    "                             'condition':solo_condition,\n",
    "                             'round_index':rid,\n",
    "                             'independent_wisdom':guess_ind,\n",
    "                             'revised_wisdom':guess_rev,\n",
    "                             'revised_wisdom_error': abs( guess_rev - correct_an ),\n",
    "                             'independent_wisdom_error': abs(guess_ind - correct_an ),\n",
    "                             'correct_answer': correct_an,\n",
    "                             'task_instance_avg_error': this_game_round_data.task_instance_avg_error.iloc[0]\n",
    "                            }\n",
    "                ## top_k errors for the trade off plot\n",
    "                for k in range(1,12+1):   \n",
    "                    # use [this_game_round_data.round_index ==rid] for issue with duplicated ids because of sampling\n",
    "                    #access to underlaying top-k (ex-post)\n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"final_ind_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_ind'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"final_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    \n",
    "                    ##live means (score up to now); \n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"current_ind_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_ind_live'] = abs(top_k_an - correct_an)\n",
    "\n",
    "                    top_k_an = this_game_round_data[this_game_round_data.round_index ==rid].sort_values(\"current_rev_score\", ascending=False).head(k).revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev_live'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "\n",
    "                    ##this is for the quarter ex ante approach:\n",
    "                    #get performance estimates in first quarter, and then look at perofmrna in second quarter.\n",
    "                    #Then do the same in third + fourth quarters\n",
    "                    if rid < 11:\n",
    "                        this_game_quarter_data = this_game_round_data[(this_game_round_data.round_index==5)] ##this gives me the first quarter data\n",
    "                    else:\n",
    "                        this_game_quarter_data = this_game_round_data[(this_game_round_data.round_index==15)] ##this gives me the first quarter data\n",
    "\n",
    "                    top_k_players = this_game_quarter_data.sort_values(\"half_cumulative\", \n",
    "                                                                       ascending=False).head(k).player_id.tolist()#these are the top k players in the quarter\n",
    "\n",
    "                    #print(top_k_players)\n",
    "                    top_k_an = this_game_round_data[(this_game_round_data.round_index ==rid) & \n",
    "                                                    (this_game_round_data.player_id.isin(top_k_players))].revised_guess.mean()\n",
    "                    game_data['top_'+str(k)+'_rev_ex_ante'] = abs(top_k_an - correct_an)\n",
    "                    \n",
    "                    \n",
    "                    \n",
    "                    ##based on independent score\n",
    "#                     top_k_players = this_game_quarter_data.sort_values(\"current_ind_score\", \n",
    "#                                                                        ascending=False).head(k).player_id.tolist()#these are the top k players in the quarter\n",
    "\n",
    "#                     #print(top_k_players)\n",
    "#                     top_k_an = this_game_round_data[(this_game_round_data.round_index ==rid) & \n",
    "#                                                     (this_game_round_data.player_id.isin(top_k_players))].revised_guess.mean()\n",
    "#                     game_data['top_'+str(k)+'_ind_ex_ante'] = abs(top_k_an - correct_an)\n",
    "\n",
    "                    \n",
    "\n",
    "\n",
    "                game_data_list.append(game_data)\n",
    "                \n",
    "    \n",
    "    \n",
    "    game_data = pd.DataFrame(game_data_list)     #*\n",
    "\n",
    "    \n",
    "    #set time after shock .. \n",
    "    game_data['round_after_shock'] = game_data.round_index\n",
    "    game_data['round_after_shock'] = np.where(game_data['round_index'] > 10 ,game_data['round_index']-10,game_data['round_after_shock']) \n",
    "    \n",
    "    \n",
    "    #adding quarters\n",
    "    game_data['quarter'] = None\n",
    "\n",
    "    game_data.loc[game_data['round_index'] < 6,'quarter'] = 1\n",
    "    game_data.loc[(game_data['round_index'] >= 6) & (game_data['round_index'] < 11),'quarter'] = 2\n",
    "    game_data.loc[(game_data['round_index'] >= 11) & (game_data['round_index'] < 16),'quarter'] = 3\n",
    "    game_data.loc[game_data['round_index'] >= 16,'quarter'] = 4\n",
    "\n",
    "\n",
    "    game_data['half'] = None    \n",
    "    game_data.loc[game_data.round_index < 11,'half'] = 1\n",
    "    game_data.loc[game_data.round_index > 10,'half'] = 2\n",
    "\n",
    "        \n",
    "    return game_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aggregating condition  dynamic_no_feedback\n",
      "aggregating condition  dynamic_self_feedback\n",
      "aggregating condition  dynamic_full_feedback\n",
      "aggregating condition  static\n",
      "aggregating condition  dynamic\n",
      "Solo condition bootstrapping now is solo_feedback\n",
      "Solo condition bootstrapping now is solo_no_feedback\n",
      "# of games per condition:\n",
      " solo_feedback            300\n",
      "solo_no_feedback         300\n",
      "dynamic                   20\n",
      "static                    20\n",
      "dynamic_self_feedback     15\n",
      "dynamic_full_feedback     15\n",
      "dynamic_no_feedback       15\n",
      "Name: condition, dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>game_id</th>\n",
       "      <th>condition</th>\n",
       "      <th>round_index</th>\n",
       "      <th>independent_wisdom</th>\n",
       "      <th>revised_wisdom</th>\n",
       "      <th>revised_wisdom_error</th>\n",
       "      <th>independent_wisdom_error</th>\n",
       "      <th>correct_answer</th>\n",
       "      <th>task_instance_avg_error</th>\n",
       "      <th>top_1_ind</th>\n",
       "      <th>...</th>\n",
       "      <th>top_11_rev_ex_ante</th>\n",
       "      <th>top_12_ind</th>\n",
       "      <th>top_12_rev</th>\n",
       "      <th>top_12_ind_live</th>\n",
       "      <th>top_12_rev_live</th>\n",
       "      <th>top_12_rev_ex_ante</th>\n",
       "      <th>round_after_shock</th>\n",
       "      <th>quarter</th>\n",
       "      <th>half</th>\n",
       "      <th>game_condition_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>LtnXEGCJzhBnyARpj</td>\n",
       "      <td>dynamic_no_feedback</td>\n",
       "      <td>1</td>\n",
       "      <td>0.486667</td>\n",
       "      <td>0.570000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.396667</td>\n",
       "      <td>0.09</td>\n",
       "      <td>0.274051</td>\n",
       "      <td>0.68</td>\n",
       "      <td>...</td>\n",
       "      <td>0.460000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.480000</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>LtnXEGCJzhBnyARpjdynamic_no_feedback</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>LtnXEGCJzhBnyARpj</td>\n",
       "      <td>dynamic_no_feedback</td>\n",
       "      <td>2</td>\n",
       "      <td>0.724167</td>\n",
       "      <td>0.807500</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>0.055833</td>\n",
       "      <td>0.78</td>\n",
       "      <td>0.229560</td>\n",
       "      <td>0.33</td>\n",
       "      <td>...</td>\n",
       "      <td>0.020000</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>0.027500</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>LtnXEGCJzhBnyARpjdynamic_no_feedback</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>LtnXEGCJzhBnyARpj</td>\n",
       "      <td>dynamic_no_feedback</td>\n",
       "      <td>3</td>\n",
       "      <td>0.596667</td>\n",
       "      <td>0.487500</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>0.313333</td>\n",
       "      <td>0.91</td>\n",
       "      <td>0.304500</td>\n",
       "      <td>0.26</td>\n",
       "      <td>...</td>\n",
       "      <td>0.398182</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>0.422500</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>LtnXEGCJzhBnyARpjdynamic_no_feedback</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>LtnXEGCJzhBnyARpj</td>\n",
       "      <td>dynamic_no_feedback</td>\n",
       "      <td>4</td>\n",
       "      <td>0.489167</td>\n",
       "      <td>0.470833</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>0.019167</td>\n",
       "      <td>0.47</td>\n",
       "      <td>0.194340</td>\n",
       "      <td>0.08</td>\n",
       "      <td>...</td>\n",
       "      <td>0.010000</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>0.000833</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>LtnXEGCJzhBnyARpjdynamic_no_feedback</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>LtnXEGCJzhBnyARpj</td>\n",
       "      <td>dynamic_no_feedback</td>\n",
       "      <td>5</td>\n",
       "      <td>0.439167</td>\n",
       "      <td>0.408333</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>0.359167</td>\n",
       "      <td>0.08</td>\n",
       "      <td>0.219937</td>\n",
       "      <td>0.22</td>\n",
       "      <td>...</td>\n",
       "      <td>0.283636</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>0.328333</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>LtnXEGCJzhBnyARpjdynamic_no_feedback</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 73 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             game_id            condition  round_index  independent_wisdom  \\\n",
       "0  LtnXEGCJzhBnyARpj  dynamic_no_feedback            1            0.486667   \n",
       "1  LtnXEGCJzhBnyARpj  dynamic_no_feedback            2            0.724167   \n",
       "2  LtnXEGCJzhBnyARpj  dynamic_no_feedback            3            0.596667   \n",
       "3  LtnXEGCJzhBnyARpj  dynamic_no_feedback            4            0.489167   \n",
       "4  LtnXEGCJzhBnyARpj  dynamic_no_feedback            5            0.439167   \n",
       "\n",
       "   revised_wisdom  revised_wisdom_error  independent_wisdom_error  \\\n",
       "0        0.570000              0.480000                  0.396667   \n",
       "1        0.807500              0.027500                  0.055833   \n",
       "2        0.487500              0.422500                  0.313333   \n",
       "3        0.470833              0.000833                  0.019167   \n",
       "4        0.408333              0.328333                  0.359167   \n",
       "\n",
       "   correct_answer  task_instance_avg_error  top_1_ind  ...  \\\n",
       "0            0.09                 0.274051       0.68  ...   \n",
       "1            0.78                 0.229560       0.33  ...   \n",
       "2            0.91                 0.304500       0.26  ...   \n",
       "3            0.47                 0.194340       0.08  ...   \n",
       "4            0.08                 0.219937       0.22  ...   \n",
       "\n",
       "   top_11_rev_ex_ante  top_12_ind  top_12_rev  top_12_ind_live  \\\n",
       "0            0.460000    0.480000    0.480000         0.480000   \n",
       "1            0.020000    0.027500    0.027500         0.027500   \n",
       "2            0.398182    0.422500    0.422500         0.422500   \n",
       "3            0.010000    0.000833    0.000833         0.000833   \n",
       "4            0.283636    0.328333    0.328333         0.328333   \n",
       "\n",
       "   top_12_rev_live  top_12_rev_ex_ante  round_after_shock  quarter  half  \\\n",
       "0         0.480000            0.480000                  1        1     1   \n",
       "1         0.027500            0.027500                  2        1     1   \n",
       "2         0.422500            0.422500                  3        1     1   \n",
       "3         0.000833            0.000833                  4        1     1   \n",
       "4         0.328333            0.328333                  5        1     1   \n",
       "\n",
       "                      game_condition_id  \n",
       "0  LtnXEGCJzhBnyARpjdynamic_no_feedback  \n",
       "1  LtnXEGCJzhBnyARpjdynamic_no_feedback  \n",
       "2  LtnXEGCJzhBnyARpjdynamic_no_feedback  \n",
       "3  LtnXEGCJzhBnyARpjdynamic_no_feedback  \n",
       "4  LtnXEGCJzhBnyARpjdynamic_no_feedback  \n",
       "\n",
       "[5 rows x 73 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# compute df\n",
    "game_rounds =  game_data_with_top_cols( round_data ,   300 )\n",
    "\n",
    "# add cols\n",
    "game_rounds['game_condition_id'] = game_rounds['game_id'].map(str) + game_rounds['condition'].map(str)\n",
    "\n",
    "# prints\n",
    "print(\"# of games per condition:\\n\", game_rounds[['game_condition_id','condition' ]].drop_duplicates()['condition'].value_counts())\n",
    "game_rounds.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "<br>\n",
    "<br>\n",
    "<br>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "game_rounds.loc[game_rounds.condition.isin(['static','dynamic','solo_feedback']),'study'] = 1\n",
    "game_rounds.loc[~game_rounds.condition.isin(['static','dynamic','solo_feedback']),'study'] = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# game_rounds['task_instance_avg_error'] = np.nan\n",
    "# for study in [1,2]:\n",
    "#     for rid in game_rounds.round_index.unique():\n",
    "#         if study == 1:\n",
    "#             bools = (game_rounds.condition=='solo_feedback') & (game_rounds.round_index == rid) & (game_rounds.study == study)\n",
    "#         else:\n",
    "#             bools = (game_rounds.condition=='solo_no_feedback') & (game_rounds.round_index == rid) & (game_rounds.study == study)\n",
    "                        \n",
    "                \n",
    "#         game_rounds.loc[\n",
    "#             (game_rounds['round_index'] == rid) &\n",
    "#             (game_rounds.study == study),\n",
    "#             'task_instance_avg_error'] = game_rounds[bools].revised_wisdom_error.mean(skipna=True)\n",
    "\n",
    "\n",
    "# ##relative errors to solo\n",
    "# game_rounds['revised_error_relative2solo'] = game_rounds.revised_wisdom_error - game_rounds.task_instance_avg_error\n",
    "# game_rounds['independent_error_relative2solo'] = game_rounds.independent_wisdom_error - game_rounds.task_instance_avg_error\n",
    "\n",
    "\n",
    "#game_rounds['plot_avg_error'] = None\n",
    "#game_rounds['revised_error_relative2solo'] = game_rounds['revised_wisdom_error']\n",
    "\n",
    "# for rid in range(1,20+1):\n",
    "#     game_rounds[game_rounds.round_index==rid]['revised_error_relative2solo'] = \n",
    "#     game_rounds[game_rounds.round_index==rid]['revised_error_relative2solo'] - game_rounds[(game_rounds.condition=='solo') &(game_rounds.round_index==rid)]['revised_error_relative2solo'].mean()\n",
    "\n",
    "study_1_round_error = {}\n",
    "for rid in range(1,20+1):\n",
    "    study_1_round_error[rid] = 0\n",
    "    study_1_round_error[rid]  = game_rounds[ (game_rounds.study==1) & (game_rounds.condition == \"solo_feedback\") \n",
    "                                            & (game_rounds.round_index==rid)]['revised_wisdom_error'].mean()\n",
    "\n",
    "study_2_round_error = {}\n",
    "for rid in range(1,20+1):\n",
    "    study_2_round_error[rid] = 0\n",
    "    study_2_round_error[rid]  = game_rounds[(game_rounds.study==2)&(game_rounds.condition == \"solo_no_feedback\") \n",
    "                                            & (game_rounds.round_index==rid)]['revised_wisdom_error'].mean()\n",
    "\n",
    "for i, row in game_rounds.iterrows():\n",
    "    if row.study == 1:\n",
    "        game_rounds.set_value(i,'revised_error_relative2solo',\n",
    "                              (row['revised_wisdom_error']-study_1_round_error[row['round_index']]))\n",
    "\n",
    "    else:\n",
    "        game_rounds.set_value(i,'revised_error_relative2solo',\n",
    "                              (row['revised_wisdom_error']-study_2_round_error[row['round_index']]))\n",
    "\n",
    "game_rounds.loc[np.isinf(game_rounds.revised_error_relative2solo), 'revised_error_relative2solo'] = np.nan\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "game_rounds.to_csv('./all_studies_game_data_revision.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
